* This file produces runs the analysis in Table A4 and outputs to an excel spreadsheet

use "$saveddata/data_complete_withcosts.dta", clear

set more off
* Drop the variables we don't need in this particular run to make it run quicker
keep aeattenddisp mort death30 death90 death365 trust_code cause_of_death bin

gen cause1 = substr(cause_of_death,1,1)
replace cause1="" if death30==0

encode cause1, gen(cause_str)

gen one=1
egen count = sum(one), by(cause_str)

tab cause1, gen(c_)

gen c_24=0
replace c_24=1 if count<500

forval x=1/24{
replace c_`x'=0 if c_`x'==.
}

	preserve	
	set more off
		cap drop diff_death
		
		* Collapse to correct level
		collapse (count) freq = aeattenddisp (mean) mort death30 death90 death365 c_*, by(bin)

		* Now do analysis...
		egen sumfreq = sum(freq)
		gen pfreq = freq/sumfreq
		drop sumfreq

		lab var pfreq "Proportion of patients"
		lab var bin "Exit time (10 min intervals)"
		compress


		** ANALYSIS HERE
		
		* bin polynomial variables
	forv x = 1/10 {
		gen bin`x' = bin^`x'
		}

	* parameters
	local porder = 10
	local start = 180
	local end = 240

	* waiting times - find end point
	local sumdiff = 999
	while abs(`sumdiff')>0.01 {
		
		cap drop pfreq_cf diff
			
		* display then update end point
		di "End point: `end'"
		local end = `end' + 10
		
		* compute counterfactual
		qui reg pfreq bin1-bin`porder' if bin<`start' | bin>`end'
		predict pfreq_cf, xb

		* check difference in mass
		gen diff = pfreq - pfreq_cf 
		qui su diff if bin>`start' | bin<`end'
		local sumdiff = r(sum)
		
		
		}
	drop diff

	* waiting times - compute effects
	qui reg freq bin1-bin`porder' if bin<`start' | bin>`end'
	predict freq_cf, xb
	gen wait = freq*bin 

	gen wait_cf = freq_cf*bin 
	gen wait_diff = wait_cf - wait

	su wait_diff
	local saving = r(sum)
	su freq
	local patients = r(sum)/2
	su wait_cf
	local totalwait = r(sum)/2
	su wait_diff if bin>=`start' & bin<=`end'
	local saving_int = r(sum)
	su wait_cf if bin>=`start' & bin<=`end'
	local totalwait_int = r(sum)/2
	su freq if bin>=`start' & bin<=`end'
	local patients_int = r(sum)/2
	local savingpp = round(`saving'/`patients',1)
	local savingpc = round(`saving'/`totalwait',0.01)
	local savingpp_int = round(`saving_int'/`patients_int',1)
	local savingpc_int = round(`saving_int'/`totalwait_int',0.01)
	di "Total waiting time saving (mins per patient): " `savingpp'
	di "Total waiting time saving (% of c.f. total): " `savingpc'
	di "Total waiting time saving in distorted window (mins per patient): " `savingpp_int'
	di "Total waiting time saving in disorted window (% of c.f. total): " `savingpc_int'

	* other outcomes - parameters and inputs to later calculations
	gen freq_diff = freq_cf - freq 
	egen sumfreq_pre_cf = sum(freq_cf) if bin>`start' & bin<=240
	egen sumfreq_post_diff = sum(freq_diff) if bin>240 & bin<=`end'
	egen sumfreq_pre = sum(freq) if bin>`start' & bin<=240
	gen w_pre = freq_cf/sumfreq_pre_cf
	gen w_post_diff = freq_diff/sumfreq_post_diff
	gen w = freq/sumfreq_pre
	gen nonmover_pre = sumfreq_pre_cf/sumfreq_pre
	drop *cf

	* other outcomes - compute counterfactuals and selection-only counterfactuals
	foreach var of varlist c_1 c_2 c_3 c_4 c_5 c_6 c_7 c_8 c_9 c_10 c_11 c_12 c_13 c_14 c_15 c_16 c_17 c_18 c_19 c_20 c_21 c_22 c_23 c_24 {

		* counterfactual outcomes
		qui reg `var' bin1-bin`porder' if bin<`start' | bin>=260
		
		predict `var'_cf, xb
		
		* non-movers: pre-target counterfactual average outcome 
		qui egen `var'_pre_cf = sum(`var'_cf*w_pre)
			
		* movers: post-target counterfactual average outcome (weighted by mover proportions)
		qui egen `var'_post_cf = sum(`var'_cf*w_post_diff)

		* selection-only counterfactual
		qui gen `var'_selection = nonmover_pre*`var'_pre_cf + (1-nonmover_pre)*`var'_post_cf
		
		* observed outcome (averaged over pre period)
		qui egen `var'_observed = sum(`var'*w)
		}

	format %9.3fc mort death*
	format %10.0fc freq* sumfreq*
		
	* build results table
	gen helper = 1
	collapse (mean) nonmover_pre *_pre_cf *_post_cf *_selection *_observed, by(helper)

	foreach i in c_1 c_2 c_3 c_4 c_5 c_6 c_7 c_8 c_9 c_10 c_11 c_12 c_13 c_14 c_15 c_16 c_17 c_18 c_19 c_20 c_21 c_22 c_23 c_24 {
		gen `i'_diff = `i'_observed - `i'_selection
		gen `i'_diff_pc = `i'_diff / `i'_selection
		}

	foreach i in pre_cf post_cf selection observed diff diff_pc {
		rename *_`i' `i'_* 
		}
		

	foreach x in c_1 c_2 c_3 c_4 c_5 c_6 c_7 c_8 c_9 c_10 c_11 c_12 c_13 c_14 c_15 c_16 c_17 c_18 c_19 c_20 c_21 c_22 c_23 c_24 {
	su diff_`x'
	matrix observe_`x' = r(mean)
	su diff_pc_`x'
	matrix observe_pc_`x' = r(mean)
	}
	

restore


capture program drop myboot

program define myboot, rclass

* data for aggregate analysis
	preserve

	* draw bootstrap sample
	bsample, cluster(trust_code)
	
	cap drop diff_death
	

		
		* Collapse to correct level
		collapse (count) freq = aeattenddisp (mean) c_1 c_2 c_3 c_4 c_5 c_6 c_7 c_8 c_9 c_10 c_11 c_12 c_13 c_14 c_15 c_16 c_17 c_18 c_19 c_20 c_21 c_22 c_23 c_24, by(bin)

		* Now do analysis...
		egen sumfreq = sum(freq)
		gen pfreq = freq/sumfreq
		drop sumfreq

		lab var pfreq "Proportion of patients"
		lab var bin "Exit time (10 min intervals)"
		compress


		** ANALYSIS HERE
		
		* bin polynomial variables
	forv x = 1/10 {
		gen bin`x' = bin^`x'
		}

	* parameters
	local porder = 10
	local start = 180
	local end = 300

	** waiting times - find end point
	local sumdiff = 999
	local t = 0
	while abs(`sumdiff')>0.01 & `t'<100{
		
		cap drop pfreq_cf diff
		
		* Update end point
		local end = `end' + 10
		
		* compute counterfactual
		qui reg pfreq bin1-bin`porder' if bin<`start' | bin>`end'
		predict pfreq_cf, xb

		* check difference in mass
		gen diff = pfreq - pfreq_cf 
		qui su diff if bin>`start' | bin<`end'
		local sumdiff = r(sum)
			
		* display end point
		di "End point: `end'"
		
		local t = `t'+1
		}
	drop diff

	* waiting times - compute effects
	qui reg freq bin1-bin`porder' if bin<`start' | bin>`end'
	predict freq_cf, xb
	gen wait = freq*bin 

	gen wait_cf = freq_cf*bin 
	gen wait_diff = wait_cf - wait

	su wait_diff
	local saving = r(sum)
	su freq
	local patients = r(sum)/2
	su wait_cf
	local totalwait = r(sum)/2
	su wait_diff if bin>=`start' & bin<=`end'
	local saving_int = r(sum)
	su wait_cf if bin>=`start' & bin<=`end'
	local totalwait_int = r(sum)/2
	su freq if bin>=`start' & bin<=`end'
	local patients_int = r(sum)/2
	local savingpp = round(`saving'/`patients',1)
	local savingpc = round(`saving'/`totalwait',0.01)
	local savingpp_int = round(`saving_int'/`patients_int',1)
	local savingpc_int = round(`saving_int'/`totalwait_int',0.01)
	di "Total waiting time saving (mins per patient): " `savingpp'
	di "Total waiting time saving (% of c.f. total): " `savingpc'
	di "Total waiting time saving in distorted window (mins per patient): " `savingpp_int'
	di "Total waiting time saving in disorted window (% of c.f. total): " `savingpc_int'

	* other outcomes - parameters and inputs to later calculations
	gen freq_diff = freq_cf - freq 
	egen sumfreq_pre_cf = sum(freq_cf) if bin>`start' & bin<=240
	egen sumfreq_post_diff = sum(freq_diff) if bin>240 & bin<=`end'
	egen sumfreq_pre = sum(freq) if bin>`start' & bin<=240
	gen w_pre = freq_cf/sumfreq_pre_cf
	gen w_post_diff = freq_diff/sumfreq_post_diff
	gen w = freq/sumfreq_pre
	gen nonmover_pre = sumfreq_pre_cf/sumfreq_pre
	drop *cf

	* other outcomes - compute counterfactuals and selection-only counterfactuals
	foreach var of varlist c_1 c_2 c_3 c_4 c_5 c_6 c_7 c_8 c_9 c_10 c_11 c_12 c_13 c_14 c_15 c_16 c_17 c_18 c_19 c_20 c_21 c_22 c_23 c_24 {

		* counterfactual outcomes
		qui reg `var' bin1-bin`porder' if bin<`start' | bin>=260
		
		predict `var'_cf, xb
		
		* non-movers: pre-target counterfactual average outcome 
		qui egen `var'_pre_cf = sum(`var'_cf*w_pre)
			
		* movers: post-target counterfactual average outcome (weighted by mover proportions)
		qui egen `var'_post_cf = sum(`var'_cf*w_post_diff)

		* selection-only counterfactual
		qui gen `var'_selection = nonmover_pre*`var'_pre_cf + (1-nonmover_pre)*`var'_post_cf
		
		* observed outcome (averaged over pre period)
		qui egen `var'_observed = sum(`var'*w)
		}

	format %10.0fc freq* sumfreq*
		
	* build results table
	gen helper = 1
	collapse (mean) nonmover_pre *_pre_cf *_post_cf *_selection *_observed, by(helper)

	foreach i in c_1 c_2 c_3 c_4 c_5 c_6 c_7 c_8 c_9 c_10 c_11 c_12 c_13 c_14 c_15 c_16 c_17 c_18 c_19 c_20 c_21 c_22 c_23 c_24{
			if `t'<99{
				gen `i'_diff = `i'_observed - `i'_selection
				gen `i'_diff_pc = `i'_diff / `i'_selection
			}
			
			if `t'>98{
				gen `i'_diff = .
				gen `i'_diff_pc = .
			}
				
		}

	foreach i in pre_cf post_cf selection observed diff diff_pc {
		rename *_`i' `i'_* 
		}
		
	keep diff*
	
	foreach x in c_1 c_2 c_3 c_4 c_5 c_6 c_7 c_8 c_9 c_10 c_11 c_12 c_13 c_14 c_15 c_16 c_17 c_18 c_19 c_20 c_21 c_22 c_23 c_24{
	su diff_`x'
	return scalar d_`x' = r(mean)
	su diff_pc_`x'
	return scalar d_pc_`x' = r(mean)
	/*
	su diff_pc_`x'
	return scalar d_`x' = r(mean)
	*/
	}
	
	*inpat_los revisit30 readmit30 discharge discharge_gp discharge_no
	
	restore

* END PROGRAMME DEFINE HERE	
end	


#delimit ;
* Simulate 199 times to test;
simulate diff_c_1 = r(d_c_1) diff_pc_c_1 = r(d_pc_c_1) diff_c_2 = r(d_c_2) diff_pc_c_2 = r(d_pc_c_2) diff_c_3 = r(d_c_3) diff_pc_c_3 = r(d_pc_c_3)
diff_c_4 = r(d_c_4) diff_pc_c_4 = r(d_pc_c_4) diff_c_5 = r(d_c_5) diff_pc_c_5 = r(d_pc_c_5) diff_c_6 = r(d_c_6) diff_pc_c_6 = r(d_pc_c_6)
diff_c_7 = r(d_c_7) diff_pc_c_7 = r(d_pc_c_7) diff_c_8 = r(d_c_8) diff_pc_c_8 = r(d_pc_c_8) diff_c_9 = r(d_c_9) diff_pc_c_9 = r(d_pc_c_9)
diff_c_10 = r(d_c_10) diff_pc_c_10 = r(d_pc_c_10) diff_c_11 = r(d_c_11) diff_pc_c_11 = r(d_pc_c_11) diff_c_12 = r(d_c_12) diff_pc_c_12 = r(d_pc_c_12)
diff_c_13 = r(d_c_13) diff_pc_c_13 = r(d_pc_c_13) diff_c_14 = r(d_c_14) diff_pc_c_14 = r(d_pc_c_14) diff_c_15 = r(d_c_15) diff_pc_c_15 = r(d_pc_c_15)
diff_c_16 = r(d_c_16) diff_pc_c_16 = r(d_pc_c_16) diff_c_17 = r(d_c_17) diff_pc_c_17 = r(d_pc_c_17) diff_c_18 = r(d_c_18) diff_pc_c_18 = r(d_pc_c_18)
diff_c_19 = r(d_c_19) diff_pc_c_19 = r(d_pc_c_19) diff_c_20 = r(d_c_20) diff_pc_c_20 = r(d_pc_c_20) diff_c_21 = r(d_c_21) diff_pc_c_21 = r(d_pc_c_21)
diff_c_22 = r(d_c_22) diff_pc_c_22 = r(d_pc_c_22) diff_c_23 = r(d_c_23) diff_pc_c_23 = r(d_pc_c_23) diff_c_24 = r(d_c_24) diff_pc_c_24 = r(d_pc_c_24)
, reps(199) seed(32786105): myboot;

* Boostrap;
bstat, stat(observe_c_1, observe_pc_c_1, observe_c_2, observe_pc_c_2, observe_c_3, observe_pc_c_3, observe_c_4, observe_pc_c_4, observe_c_5, observe_pc_c_5,
observe_c_6, observe_pc_c_6, observe_c_7, observe_pc_c_7, observe_c_8, observe_pc_c_8, observe_c_9, observe_pc_c_9, observe_c_10, observe_pc_c_10,
observe_c_11, observe_pc_c_11, observe_c_12, observe_pc_c_12, observe_c_13, observe_pc_c_13, observe_c_14, observe_pc_c_14, observe_c_15, observe_pc_c_15,
observe_c_16, observe_pc_c_16, observe_c_17, observe_pc_c_17, observe_c_18, observe_pc_c_18, observe_c_19, observe_pc_c_19, observe_c_20, observe_pc_c_20,
observe_c_21, observe_pc_c_21, observe_c_22, observe_pc_c_22, observe_c_23, observe_pc_c_23, observe_c_24, observe_pc_c_24);
 

#delimit cr

* Output results
putexcel set "$results\TableA4.xlsx", replace
putexcel A1 = "Variable"
putexcel B1 = "Coefficient"
putexcel C1 = "Std. error"

matrix b = e(b)'
putexcel A2 = matrix(b), rownames

matrix se = e(se)'
putexcel C2 = matrix(se)
